*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   	 Local Data 
*
*===============================================================================


********************************************************************************
*						Information experiment data clean 	 				   *
********************************************************************************



******************************************
* cleaning data
******************************************


* Set Paths and Settings

clear
set more off        
cap log close

	
* Load data:
* ---------------------------------------------------------------------------- *

use "$exp_data/raw_exp", clear 
* this is the raw data, the only change that has been made is that identifying information (e.g. IP addresses and exact location) have been removed 

* Sample Restriction:
* ---------------------------------------------------------------------------- *

* Drop if results by preview
tab distributionchannel
drop if distributionchannel == "preview" // 9,225 remain 


* Check that all responses are completed:
tab progress

tab q171
keep if q171 == 4			// consent, 9,037 remain


* dropping duplicates by IP address 
sort ipaddress startdate
quietly by ipaddress: gen dup = cond(_N==1,0,_n)
drop if dup>1 // 8,274 remain 
drop dup

* dropping duplicates by id 
sort id startdate // dropping duplicates by ID
quietly by id: gen dup = cond(_N==1,0,_n)
drop if dup>1 & id!=. 
drop dup // 8,023 remain 


* first attention check
tab attention if prespecified==0, m
tab attention if prespecified==1, m
tab attention, m
keep if attention==1 // 6,693 remain 


tab employmentstatus
drop if employmentstatus !=1 // not full-time employed, 5,732


tab öffentlicherDienst
drop if  öffentlicherDienst==1 // civil servant
 
tab selfemployed
drop if  selfemployed==1 // self-employed, 4,889 remain 


tab occupation_accuracy

drop if occupation_accuracy==3 | occupation_accuracy==4 // occupation doesn't fit well, 4,253 remain 


tab finished 	
keep if finished==1
drop progress finished // 3,741 remain 

* dropping people with implausible earnings numbers
foreach var in pretaxwageearning pretreatment_oo point_more post_outsideoption {
		
		cap quietly replace `var' = substr(`var', 1, strpos(`var', ",")-1) if regexm(`var', ",") // remove digits after decimal separator
		cap quietly replace `var' = subinstr(`var',".","",.) // removing thousands separators
		cap quietly replace `var' = strtrim(`var')
		cap quietly replace `var' = stritrim(`var')
		
		quietly destring `var', force replace
		
		drop if `var'<(0.2*meanwage_raw2) | `var'>(3*meanwage_raw2) | `var'==.
} // 3,510 remain 
	
	
		  
* checking attention for treated group
gen info_underestimate = .
	
replace info_underestimate = -attentioncheckinfo_1 if attentioncheckinfo_1!=. // if you underestimated
replace info_underestimate = attentioncheckinfo_2 if attentioncheckinfo_2!=. // if you overestimated
	
gen bias = point_more - meanwage_raw2
	
gen valid_exactly = bias==info_underestimate if personalinfo==1
	
gen valid_approx = abs(bias-info_underestimate)<(0.1*abs(bias)) if personalinfo==1
replace valid_approx = 0 if valid_exactly==1

tab valid_exactly
tab valid_approx

	
* Occupation Attention Check: 
gen attentionocc = attentionfinal==2
tab attentionocc
drop if attentionocc!=1 & prespecified==1 // 3,231 remain 


* Second attention check: 
gen attentionsecond = (attention_2=="1,5") if prespecified==1
tab attentionsecond



* restriction based on these attention checks happens later (once we test if they're correlated with treatment)

tab personalinfo
 
* Clean data:
* ---------------------------------------------------------------------------- *
	
* Cleaning pre-and-post treatment outside options
	rename post_outsideoption posttreatment_oo
	

	gen pretreatment_wagechange = pretreatment_oo-pretaxwageearning
	gen pretreatment_wagechange_pct = 100*(pretreatment_wagechange/pretaxwageearning)
	gen pretreatment_wagechange_dln = ln(pretreatment_oo)-ln(pretaxwageearning)
	gen posttreatment_wagechange = posttreatment_oo-pretaxwageearning
	gen posttreatment_wagechange_pct = 100*(posttreatment_wagechange/pretaxwageearning)
	gen posttreatment_wagechange_dln = ln(posttreatment_oo)-ln(pretaxwageearning)
	
	su pretreatment_wagechange_pct, d
	su posttreatment_wagechange_pct, d
	su pretreatment_wagechange_dln, d
	su posttreatment_wagechange_dln, d
	
	
	gen beliefowngap = point_more-pretaxwageearning
	gen beliefowngap_pct = 100*(beliefowngap/pretaxwageearning)
	
	su beliefowngap_pct, d
	
* ---------------------------------------------------------------------------- *	
	
	
	
* post-treatment errors
gen belief_post = .
replace belief_post = pretaxwageearning+post_beliefmore if posttreatmentbelief==22
replace belief_post = pretaxwageearning-post_beliefless if posttreatmentbelief==23

gen bias_belief_post_pct = ((belief_post-meanwage_raw2)/meanwage_raw2)*100
gen bias_belief_post_log = (log(belief_post)-log(meanwage_raw2))*100
	
* Cleaning Reservation Wage:
	* Cleaning Reservation Wage:
	tab r_wage_employee
	
	
	replace r_wage_employee = "0.01" if r_wage_employee=="0,01"
 	replace r_wage_employee = "0.1" if r_wage_employee=="0,1"
	replace r_wage_employee = "0.5" if r_wage_employee=="0,5"
	replace r_wage_employee = "1.5" if r_wage_employee=="1,5"
	replace r_wage_employee = "2" if r_wage_employee=="2,"
	replace r_wage_employee = "5" if r_wage_employee=="5%"
	replace r_wage_employee = "10" if r_wage_employee=="10%"
	
	destring r_wage_employee, replace force
	

* search costs
foreach var in searchcosts_1 searchcosts_2 {
	recode `var' (4 = 1) (1 = 4) (2 = 3) (3 = 2)
}


* ---------------------------------------------------------------------------- *	

* Cleaning for Statistics
	replace hoursworked= 10 if hoursworked==1
	replace hoursworked= 23 if hoursworked==2
	replace hoursworked= 28 if hoursworked==3
	replace hoursworked= 33 if hoursworked==4
	replace hoursworked= 38 if hoursworked==5
	replace hoursworked= 43 if hoursworked==6
	replace hoursworked= 50 if hoursworked==7
	
	replace sizeemployer= 3 if sizeemployer==9
	replace sizeemployer= 8 if sizeemployer==10
	replace sizeemployer= 15 if sizeemployer==11
	replace sizeemployer= 60 if sizeemployer==12
	replace sizeemployer= 150 if sizeemployer==13
	replace sizeemployer= 600 if sizeemployer==14
	replace sizeemployer= 1500 if sizeemployer==15
	replace sizeemployer= 3000 if sizeemployer==16
	
	recode prior_info1 (4 = 0) (5 = 1) (6 = 2) (7 = 3) (8 = 4) (9 = 5) (10 = 8) (11 = 15) 
	
	rename bargaining_2 bargaining_2_1
	gen bargaining_2_2 = bargaining_2_1
	
	table bargaining_2_2
	
	recode bargaining_2_1 (9 = 0) (1 = 0) (2 = 0.5) (3 = 1.5) (4 = 3.5) (5 = 7.5) (6 = 12.5) (7 = 17.5) (8 = 25)
	recode bargaining_2_2 (9 = .) (1 = 0) (2 = 0.5) (3 = 1.5) (4 = 3.5) (5 = 7.5) (6 = 12.5) (7 = 17.5) (8 = 25)
	* difference is whether they recode "no negotiation intended" to zero or missing
	
	
	gen quitelasticity_slope = (quitelasticity_2_11-quitelasticity_3_11)/20
	
	gen quitelasticity_slope_minus = (quitelasticity_2_11-quitelasticity_1_11)/10
	gen quitelasticity_slope_plus = (quitelasticity_1_11-quitelasticity_3_11)/10
	
	local elasticvars "quitelasticity_1_11 quitelasticity_2_11 quitelasticity_3_11 quitelasticity_slope quitelasticity_slope_minus quitelasticity_slope_plus"
	
* Variable Definition (for each subsample)
* ---------------------------------------------------------------------------- *

* testing whether treatment predicts passing post-treatment attention checks
preserve
	
		gen bias_belief = point_more - meanwage_raw2
		gen bias_belief_pct = ((point_more - meanwage_raw2)/meanwage_raw2)*100
		winsor bias_belief_pct, p(0.02) gen(bias_belief_pct_w2)
		
		gen bias_treated = personalinfo*bias_belief_pct_w2
	
		foreach outcome in attentionocc attentionsecond {
	
			
			reg `outcome' bias_belief_pct_w2 personalinfo bias_treated, robust
		
		}
		
	
	
	
restore
		

* merging on hypothesis guessing data
preserve 
	*** number 1
	import excel "${exp_data}/hypothesis_guess_1.xlsx", firstrow clear
	
	keep id coding
	drop if id==.
	
	* clean variable "coding" (misspelling of the answers by hand-coders)
	replace coding = "beliefs" if coding == "beliegs"
	replace coding = "beliefs, attention" if coding == "belifs, attention"
	replace coding = "dk" if coding == "dl"
	replace coding = "wages" if coding == "wagees"
	replace coding = strtrim(coding)
	
	gen correct = coding=="correct"
	gen correctx = 0
	replace correctx = 1 if coding=="correct" | coding=="wages" | coding=="beliefs"	
	
	* making unique by ID to enable merging, but erring on the side of classifying someone as correctly guessing
	egen maxcorrect = max(correct), by(id)
	drop if correct!=maxcorrect
	egen maxcorrectx = max(correctx), by(id)
	drop if correctx!=maxcorrectx
	
	sort id
	quietly by id: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
	rename coding coding1
	rename correctx correctx1
	rename correct correct1
	keep id correct1 coding1 correctx1
	
	save "${temp}/hypothesis1.dta", replace
	
	*** number 2
	import excel "${exp_data}/hypothesis_guess_2.xlsx", firstrow clear
	
	keep id coding
	drop if id==.
	
	* clean variable "coding" (misspelling of the answers)
	replace coding = "beliefs" if coding == "beleifs"
	replace coding = "dk" if coding == "dl"
	replace coding = "dk" if coding == "ak"
	replace coding = strtrim(coding)
	
	gen correct = coding=="correct"
	gen correctx = 0
	replace correctx = 1 if coding=="correct" | coding=="wages" | coding=="beliefs"	
	
	* making unique by id to enable merging, but erring on the side of classifying someone as correctly guessing
	egen maxcorrect = max(correct), by(id)
	drop if correct!=maxcorrect
	egen maxcorrectx = max(correctx), by(id)
	drop if correctx!=maxcorrectx
	
	sort id
	quietly by id: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
	rename correct correct2
	rename correctx correctx2
	rename coding coding2
	keep id correct2 coding2 correctx2
		
	save "${temp}/hypothesis2.dta", replace
	
	*** number 3
	import excel "${exp_data}/hypothesis_guess_3.xlsx", firstrow clear
	
	keep id coding 
	drop if id==.
	
	* clean variable "coding" (misspelling of the answers)
	replace coding = strtrim(coding)
	
	gen correct = coding=="correct"
	gen correctx = 0
	replace correctx = 1 if coding=="correct" | coding=="wages" | coding=="beliefs"
	
	* making unique by ID to enable merging, but erring on the side of classifying someone as correctly guessing
	egen maxcorrect = max(correct), by(id)
	drop if correct!=maxcorrect
	egen maxcorrectx = max(correctx), by(id)
	drop if correctx!=maxcorrectx
	
	sort id
	quietly by id: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
	rename correct correct3
	rename correctx correctx3
	rename coding coding3
	keep id correct3 coding3 correctx3
		
	save "$temp/hypothesis3.dta", replace
	
	*** number 4
	import excel "$exp_data/hypothesis_guess_4.xlsx", firstrow clear
	
	keep id coding
	drop if id==.
	
	* clean variable "coding" (misspelling of the answers)
	replace coding = strtrim(coding)
	
	gen correct = coding=="correct"
	gen correctx = 0
	replace correctx = 1 if coding=="correct" | coding=="wages" | coding=="beliefs"	
	
	* making unique by ID to enable merging, but erring on the side of classifying someone as correctly guessing
	egen maxcorrect = max(correct), by(id)
	drop if correct!=maxcorrect
	egen maxcorrectx = max(correctx), by(id)
	drop if correctx!=maxcorrectx
	
	sort id
	quietly by id: gen dup = cond(_N==1,0,_n)
	drop if dup>1
	drop dup
	
	rename coding coding5
	rename correctx correctx5
	rename correct correct5
	keep id correct5 coding5 correctx5
	
	save "$temp/hypothesis4.dta", replace

restore

merge m:1 id using "$temp/hypothesis1.dta", keep(master match)
rename _merge merge1

merge m:1 id using "$temp/hypothesis2.dta", keep(master match) 
rename _merge merge2

merge m:1 id using "$temp/hypothesis3.dta", keep(master match) 
rename _merge merge4

rename correct3 correct4
rename correctx3 correctx4
rename coding3 coding4
merge m:1 id using "$temp/hypothesis3.dta", keep(master match)
rename _merge merge3

merge m:1 id using "$temp/hypothesis4.dta", keep(master match) 
rename _merge merge5

gen coding = ""
replace coding = coding1
replace coding = coding2 if coding==""
replace coding = coding3 if coding==""
replace coding = coding4 if coding==""
replace coding = coding5 if coding==""

gen correct = .
replace correct = correct1
replace correct = correct2 if correct==. 
replace correct = correct3 if correct==.
replace correct = correct4 if correct==.
replace correct = correct5 if correct==.
replace correct = . if coding==""
* there are 2 obs who have nonmissing correct1 _and_ correct2, but neither of them get it correct under either variable

* correctx extends correct to those who guess the experiment is about beliefs or wages 
gen correctx = . 
replace correctx = correctx1
replace correctx = correctx2 if correctx==. 
replace correctx = correctx3 if correctx==.
replace correctx = correctx4 if correctx==.
replace correctx = correctx5 if correctx==.
replace correctx = . if coding==""



* wages, don't know, beliefs, labor market, attention, junk, correct

replace coding = strtrim(coding)
replace coding = "other" if !(coding=="LM" | coding=="attention" | coding=="beliefs" | coding=="correct" ///
	| coding=="dk" | coding=="junk" | coding=="wages") & coding!=""
	
gen codingnum = .
replace codingnum = 1 if coding=="correct"
replace codingnum = 2 if coding=="dk"
replace codingnum = 3 if coding=="LM"
replace codingnum = 4 if coding=="attention"
replace codingnum = 5 if coding=="wages"
replace codingnum = 6 if coding=="beliefs"
replace codingnum = 7 if coding=="junk"
replace codingnum = 8 if coding=="other"

tabulate codingnum prespecified

preserve 

drop if prespecified==0
tabulate codingnum prespecified

restore 

save "$temp/analysis_penultimate.dta", replace


* create data set for pooled sample and only the post-pilot data 
foreach restriction in 0 1 {

		
	use "$temp/analysis_penultimate.dta", clear
	
	* restriction==0 is the full sample
	
	* restriction==1 is just the prespecified sample
	if `restriction'==1 keep if prespecified==1
	
	
	* Variable Definition and Winsorizing based on restricted sample (0-1)
		gen bias_belief = point_more - meanwage_raw2
		gen bias_belief_pct = ((point_more - meanwage_raw2)/meanwage_raw2)
		
	*Rescale variables between (0-1)
		foreach x in pretreatment_wagechange_pct posttreatment_wagechange_pct beliefowngap_pct bias_belief_post_pct quitelasticity_1_11  job_search_11 bargaining_1_11 r_wage_employee quitelasticity_2_11 quitelasticity_3_11 bargaining_2_1 bargaining_2_2 bias_belief_post_log{
			replace `x'=`x'/100
			
		}


		foreach var in bias_belief bias_belief_pct pretreatment_wagechange ///
					   pretreatment_wagechange_pct posttreatment_wagechange ///
					   posttreatment_wagechange_pct beliefowngap beliefowngap_pct ///
					   bias_belief_post_pct bias_belief_post_log sizeemployer r_wage_employee pretreatment_wagechange_dln posttreatment_wagechange_dln `elasticvars' { // added winsorization of reservation wage, quit elasticity variables, 27 July
				codebook `var'
				winsor `var', p(.02) gen(`var'_w2)
				winsor `var', p(.05) gen(`var'_w5)
			}

		gen overestimate = bias_belief>0
		gen underestimate = bias_belief<=0
		
		
		
	* Variable Definition: Mean Salary benchmark vs Own Wage 
		gen truthpercentgap = (meanwage_raw2 - pretaxwageearning)/pretaxwageearning
		gen beliefpercentgap = (point_more - pretaxwageearning) / pretaxwageearning
		
	* Definition whether salary is higher or lower than average
		gen relative_salary = pretaxwageearning - meanwage_raw2
		gen overpaid = relative_salary>0
		
		
	* Create Variables for Statistics
		gen treatmentinfo = personalinfo
		
		tab placeofwork_1
		gen state1=placeofwork_1==492		// Nordrhein-Westfalen
		gen state2=placeofwork_1==90		// Bavaria
		gen state3=placeofwork_1==1			// Baden-Wuerttemberg
		
		replace gender = 0 if gender==1
		replace gender = 1 if gender==2
		
		replace öffentlicherDienst = 0 if öffentlicherDienst==2 // civil service 
		
		replace tarif_ja_nein = 0 if tarif_ja_nein==2			
			
		gen beliefpercentgap_tr = beliefpercentgap*treatmentinfo	
		
	* no completed education / vocational training / university degree
		tab education1
		gen loweducation = education1==1
		gen mediumeducation = education1==2
		gen higheducation = education1==3
		
		
	* labels

		label variable job_search_11 "Prob of Seeking New Job (in %)"
		label variable quitelasticity_1_11 "Prob of Quitting Job (in %)"
		label variable quitelasticity_2_11 "Prob of Quitting Job (in %)"
		label variable quitelasticity_3_11 "Prob of Quitting Job (in %)"
		label variable quitelasticity_slope "Implied Quit Elasticity"
		label variable pretreatment_wagechange_pct_w2 "Pre-Treat Beliefs: Own Wage Change (in %)"
		label variable posttreatment_wagechange_pct_w2 "Post-Treat Beliefs: Own Wage Change (in %)"
		label variable bargaining_1_11 "Prob of Negotiating for Pay Rise (in %)"
		label variable bargaining_2_1 "Intended Magnitude of Raise (No Neg = 0)"
		label variable bargaining_2_2 "Intended Magnitude of Raise (No Neg = Msg)"
		label variable r_wage_employee "Reservation Wage Cut (in %)"
		label variable r_wage_employee_w2 "Reservation Wage Cut (in %)"		
		label variable posttreatment_wagechange_pct_w2 "Post-Treatment OO Belief (%)"
		label variable bias_belief_post_pct_w2 "Post-Treatment Bias about Similar Worker Wage (%)"
		label variable bias_belief_post_log_w2 "Post-Treatment Bias about Similar Worker Wage (log difference)"
		

		
		save "$temp/experiment`restriction'.dta", replace
		
		
}
